# Loading all necessary libraries for python, pandas, numpy
import pandas as pd
import numpy as np
import re
import itertools
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from collections import Counter
from math import pi
from matplotlib import colors as mcolors
pd.options.display.precision
pd.set_option('display.float_format', lambda x: '%.7f' % x)
# Initializing Bokeh resources inline
from bokeh.resources import INLINE,CDN
import bokeh.io
bokeh.io.output_notebook(INLINE)
# Streatch and Display notebook
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
# Importing necessary libraries for Bokeh
from bokeh.plotting import figure
from bokeh.models import Title,ColumnDataSource, LinearColorMapper, ColorBar, BasicTicker, PrintfTickFormatter,HoverTool,LinearAxis, Range1d,LogAxis,Panel,RadioButtonGroup,CheckboxButtonGroup ,Tabs,CheckboxGroup,CDSView,CustomJS,CustomJSFilter,GeoJSONDataSource,Slider,LogColorMapper,Label,LabelSet,Text, Div
from bokeh.palettes import Viridis256,brewer,YlOrRd9,YlGnBu,Spectral3,Spectral6,magma,viridis,cividis,YlOrRd,RdYlGn,Category20_20,Category20b
from bokeh.transform import transform,linear_cmap,cumsum
from bokeh.layouts import column, row, WidgetBox,grid, layout
from bokeh.io import push_notebook, show, output_notebook,output_file,save
from ipywidgets import interact
from bokeh.application.handlers import FunctionHandler
from bokeh.application import Application
# Load data from NY times : https://github.com/nytimes/covid-19-data
import datetime
data_cases = pd.read_csv('us-states_new.csv')
data_cases['date']=pd.to_datetime(data_cases['date'], format = '%m/%d/%Y')
data_cases['Week']=data_cases['date'].dt.week
data_cases['Day']=data_cases['date'].dt.day
data_cases['Month']=data_cases['date'].dt.month
data_cases.set_index(['date', 'state'])
data_cases.head(2)
# Loading the united states population data to represent the case ratio per capita
data_population = pd.read_csv('us-population.csv', index_col=['state'])
data_population.head(2)
# Loading the united states list of states to be filtered upon
data_maps = pd.read_csv('us-contiguous-states.csv', index_col=['state'])
data_maps.head(2)
# Merging the datasets
data_maps_pop=data_population.merge(data_maps, left_on='state', right_on='state')
data_covid=data_maps_pop.merge(data_cases, left_on='state', right_on='state')
data_covid.drop(columns=['data', 'r','g','b','id'],inplace=True)
data_covid.head(2)
# Calculating cumulative case ratio per capita , based on the data from NY times
data_covid['caseratiocumulative']=data_covid['cases']/(data_covid['population']/1000000)
data_covid=data_covid.sort_values(by=['state','date'], ascending=True)
#Calculating the case ratio per day, by substracting the difference per day by grouping each state
data_covid['caseperday']=data_covid.groupby(['state'])['cases'].transform(lambda x: x.diff())
# Creating a function to handle the first value in each group
data_covid=data_covid.sort_values(by=['state','date'], ascending=True)
def funchandlefstval(group):
if group.caseperday.values[0]!=group.cases.values[0]:
group.caseperday.values[0] = group.cases.values[0]
return group
data_covid=data_covid.groupby('state').apply(funchandlefstval)
# Scaling the value to population per million to compare the states
data_covid['caseperdayratio']=data_covid['caseperday']/(data_covid['population']/1000000)
#Calculating the case percentage change by a fortnight within each state
data_covid['casepctchangebyfortnight']=data_covid.groupby(['state'])['cases'].transform(lambda x: x.pct_change(14,fill_method='ffill'))
data_covid.tail(5)
#Rank based on cumulative case ratio to be used for the multi-line plot
data_covid["rankrev1"] =data_covid.groupby("date")["caseratiocumulative"].rank("dense", ascending=True)
data_covid["rank1"] =data_covid.groupby("date")["caseratiocumulative"].rank("dense", ascending=False)
#Rank based on case ratio per day
data_covid["rankrev2"] =data_covid.groupby("date")["caseperdayratio"].rank("dense", ascending=True)
data_covid["rank2"] =data_covid.groupby("date")["caseperdayratio"].rank("dense", ascending=False)
data_covid.head(2)
# Cumulative case dataframe
data_covid_perdaycumulative=data_covid.loc[:,['state','population','date','cases','caseratiocumulative','rankrev1','rank1']]
data_covid_perdaycumulative.head(2)
# Per day cases dataframe
data_covid_perday=data_covid.loc[:,['state','population','date','caseperday','caseperdayratio','cases','Week','Month','rankrev2','rank2']]
data_covid_perday.head(2)
# Creating a dataframe with case ratio rolled up to the week level
data_covid_temp=data_covid_perday.loc[:,['state','population','Week','caseperday']]
data_covid_perweek=data_covid_temp.groupby(['state','Week','population'])[['caseperday']].sum()
data_covid_perweek.reset_index(inplace=True)
data_covid_perweek.rename(columns={'caseperday': 'caseperweek'},inplace=True)
data_covid_perweek['caseperweekratio']=data_covid_perweek['caseperweek']/(data_covid_perweek['population']/1000000)
data_covid_perweek['rankrev3'] =data_covid_perweek.groupby('Week')['caseperweekratio'].rank("dense", ascending=True)
data_covid_perweek['rank3'] =data_covid_perweek.groupby('Week')['caseperweekratio'].rank("dense", ascending=False)
data_covid_perweek.head(2)
# Creating a dataframe with case ratio rolled up to the month level
data_covid_temp=data_covid_perday.loc[:,['state','population','Month','caseperday']]
data_covid_perMonth=data_covid_temp.groupby(['state','Month','population'])[['caseperday']].sum()
data_covid_perMonth.reset_index(inplace=True)
data_covid_perMonth.rename(columns={'caseperday': 'casepermonth'},inplace=True)
data_covid_perMonth['caseperMonthratio']=data_covid_perMonth['casepermonth']/(data_covid_perMonth['population']/1000000)
data_covid_perMonth['rankrev4'] =data_covid_perMonth.groupby('Month')['caseperMonthratio'].rank("dense", ascending=True)
data_covid_perMonth['rank4'] =data_covid_perMonth.groupby('Month')['caseperMonthratio'].rank("dense", ascending=False)
data_covid_perMonth=data_covid_perMonth.sort_values(by=['state','Month'], ascending=True)
data_covid_perMonth.head(2)
# choosing the rows of higher cumulative case ratio values from the dataframe
data_covid_perdaycumulativetemp1=data_covid_perdaycumulative[(data_covid_perdaycumulative['rank1']<=5)]
data_covid_perdaycumulativetemp1['state'].unique()
data_covid_perdaycumulativetemp1= data_covid_perdaycumulative[data_covid_perdaycumulative.state.isin(data_covid_perdaycumulativetemp1['state'].unique())]
#transpose cumulative plot to produce multiple bokeh lines
data_covid_perdaycumulativeP=data_covid_perdaycumulativetemp1.loc[:,['date','state','caseratiocumulative']]
data_covid_perdaycumulativeP=data_covid_perdaycumulativeP.pivot(index='date',columns='state', values='caseratiocumulative')
data_covid_perdaycumulativeP.reset_index(inplace=True)
data_covid_perdaycumulativeP = data_covid_perdaycumulativeP.rename_axis("idx", axis="columns")
data_covid_perdaycumulativeP.tail(2)
import bokeh.plotting as bop
# Plot 1 for line plot with interactive feature
def bkplt1():
col = []
# Prep data
# remove unwanted columns
numlines = len(data_covid_perdaycumulativeP.columns)-1
dfp1=data_covid_perdaycumulativeP
[col.append(i) for i in dfp1.columns[1:]]
#import color pallete
mypalette = Category20_20[0:numlines]
# make the figure
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
p1 = figure(title="States with High case Ratio (Covid-19)", y_axis_type="log",x_axis_type='datetime'
, tools = TOOLS, plot_width=1000, plot_height=700 )
# loop through our columns and colours
line_dash_styles = [[10, 0], [20, 1], [10, 1], [5, 1]]
for (columnnames, colore,line_dash ) in zip(col, mypalette, itertools.cycle(line_dash_styles)):
line=p1.line(dfp1.date, dfp1[columnnames], legend_label = columnnames, color = colore , line_width=2,line_dash=line_dash)
hover = HoverTool(tooltips=[('State', columnnames)],renderers=[line])
p1.add_tools(hover)
p1.xaxis.axis_label = 'Date'
p1.xaxis.axis_label_text_font = "Segoe UI"
p1.yaxis.axis_label = 'Case Ratio'
p1.yaxis.axis_label_text_font = "Segoe UI"
p1.yaxis.major_label_orientation = "vertical"
p1.xaxis.axis_line_width = 0.5
p1.xaxis.axis_line_color = "black"
p1.yaxis.major_label_text_color = "Black"
p1.title.text_color = "Black"
p1.title.text_font = "Segoe UI"
p1.title.text_font_style = "bold italic"
p1.title.align ='center'
p1.title.text_font_size="10pt"
p1.background_fill_alpha = 0.1
p1.border_fill_color = "whitesmoke"
p1.min_border_left = 20
p1.outline_line_width = 1
p1.outline_line_alpha = 0.5
p1.outline_line_color = "grey"
# Define Legend
p1.legend.location = "top_left"
p1.legend.click_policy="hide"
p1.legend.inactive_fill_color='gray'
p1.legend.inactive_fill_alpha=0.4
p1.legend.label_text_font_size ="8pt"
p1.legend.title = 'States'
p1.legend.title_text_font = "bold italic"
p1.add_layout(Title(text="Figure 1", text_font_size="12pt",text_font_style='italic',text_color='blue'), 'below')
return p1
bkplt1()
data_covid_perdaycumulativetemp2=data_covid_perdaycumulative[(data_covid_perdaycumulative['rank1']>=44)]
data_covid_perdaycumulativetemp2['state'].unique()
# choosing the rows of lower cumulative case ratio values from the dataframe
data_covid_perdaycumulativetemp2= data_covid_perdaycumulative[data_covid_perdaycumulative.state.isin(data_covid_perdaycumulativetemp2['state'].unique())]
data_covid_perdaycumulativeL=data_covid_perdaycumulativetemp2.loc[:,['date','state','caseratiocumulative']]
data_covid_perdaycumulativeL=data_covid_perdaycumulativeL.pivot(index='date',columns='state', values='caseratiocumulative')
data_covid_perdaycumulativeL.reset_index(inplace=True)
data_covid_perdaycumulativeL = data_covid_perdaycumulativeL.rename_axis("idx", axis="columns")
data_covid_perdaycumulativeL.tail(2)
# Plot 2 for line plot with interactive feature
def bkplt2():
p2=figure()
col = []
#Data prep
numlines = len(data_covid_perdaycumulativeL.columns)-1
# remove unwanted columns
dfp2=data_covid_perdaycumulativeL
# make a list of our columns
[col.append(i) for i in dfp2.columns[1:]]
#import color pallete
mypalette = Category20_20[0:numlines]
# make the figure,
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
p2 = figure(title="States with Low case Ratio (Covid-19)", y_axis_type="log",x_axis_type='datetime'
, tools = TOOLS, plot_width=1000, plot_height=700 )
# loop through our columns and colours
line_dash_styles = [[10, 0], [20, 1], [10, 1], [5, 1]]
for (columnnames, colore,line_dash ) in zip(col, mypalette, itertools.cycle(line_dash_styles)):
line=p2.line(dfp2.date, dfp2[columnnames], legend_label = columnnames, color = colore , line_width=2,line_dash=line_dash)
hover = HoverTool(tooltips=[('State', columnnames)],renderers=[line])
p2.add_tools(hover)
line_dash_styles = [[10, 0], [20, 1], [10, 1], [5, 1]]
p2.xaxis.axis_label = 'Date'
p2.xaxis.axis_label_text_font = "Segoe UI"
p2.yaxis.axis_label = 'Case Ratio'
p2.yaxis.axis_label_text_font = "Segoe UI"
p2.yaxis.major_label_orientation = "vertical"
p2.xaxis.axis_line_width = 0.5
p2.xaxis.axis_line_color = "black"
p2.yaxis.major_label_text_color = "Black"
p2.title.text_color = "Black"
p2.title.text_font = "Segoe UI"
p2.title.text_font_style = "bold italic"
p2.title.align ='center'
p2.title.text_font_size="10pt"
p2.background_fill_color = "beige"
p2.background_fill_alpha = 0.1
p2.border_fill_color = "whitesmoke"
p2.min_border_left = 20
p2.outline_line_width = 1
p2.outline_line_alpha = 0.5
p2.outline_line_color = "grey"
# Define Legend
p2.legend.location = "top_left"
p2.legend.click_policy="hide"
p2.legend.inactive_fill_color='gray'
p2.legend.inactive_fill_alpha=0.4
p2.legend.label_text_font_size ="8pt"
p2.legend.title = 'States'
p2.legend.title_text_font = "bold italic"
p2.add_layout(Title(text="Figure 2", text_font_size="12pt",text_font_style='italic',text_color='blue'), 'below')
return p2
bkplt2()
p1=bkplt1()
p2=bkplt2()
layoutl=layout(Div(text='Time Series plot1', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[p1]])
layout2=layout(Div(text='Time Series plot2', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[p2]])
tab1 = Panel(child=layoutl, title="Time Series plot1")
tab2 = Panel(child=layout2, title="Time Series plot2")
Finlayout=Tabs(tabs=[tab1,tab2],sizing_mode='scale_width')
try:
bop.reset_output()
bop.output_notebook()
bop.show(Finlayout)
# create an output file
output_file('Covid-19_TS_Plot_Jan_2020_July_2020.html')
save(Finlayout,notebook_handle=True)
except:
bop.output_notebook()
bop.show(Finlayout)
# create an output file
output_file('Covid-19_TS_Plot_Jan_2020_July_2020.html')
save(Finlayout,notebook_handle=True)
# Refer to site for plot: https://akrishn986.github.io/Covid-19_TS_Plot_Jan_2020_July_2020.html
it is evident that New York, New Jersey, Massachusetts, Rhode Island, Washington D.C have higher case ratios and have flatlined. Around June, the number of cases increased drastically in Arizona, Louisiana and Florida and seems to continue to increase. These are the states of interest that we can consider in our granger analysis.
Montana, Missouri, Kentucky, Maine and New Hampshire have less cases per capita than most other states in the United states.
# Import geopandas for maps
import geopandas as gpd
contiguous_usa = gpd.read_file('mapssrc/cb_2018_us_state_20m.shp')
One more way of analyzing the most affected and least affected states is to identify the states that have been consistently having low no of cases or high no of cases for the entire 30 weeks of 2020.
#creating a dataframe to calculate the average case ratio till data, by using the field cases per day in the calculations
data_covid_perdaymap0 = data_covid_perday.groupby(['state','population'])['caseperday'].mean()
data_covid_perdaymap0=data_covid_perdaymap0.to_frame()
data_covid_perdaymap0.rename(columns={'caseperday': 'caseperdayavg'},inplace=True)
data_covid_perdaymap0=data_covid_perdaymap0.reset_index()
data_covid_perdaymap0['caseperdayavgov']=(data_covid_perdaymap0['caseperdayavg'])/(data_covid_perdaymap0['population']/1000000)
#merging covid data with the shape file for state boundaries.
data_covid_perdaymap = contiguous_usa.merge(data_covid_perdaymap0, left_on = 'NAME', right_on = 'state')
data_covid_perdaymap=data_covid_perdaymap.sort_values(by=['state'], ascending=True)
data_covid_perdaymap.head(2)
# Plot 3 for choropleth
def bkplt3():
p3=figure()
# Define color palettes
palettep3 = brewer['Reds'][9]
palettep3 = palettep3[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapperp3 = LogColorMapper(palette = palettep3, low =data_covid_perdaymap['caseperdayavgov'].min(), high = data_covid_perdaymap['caseperdayavgov'].max())
# Input GeoJSON source that contains features for plotting
geosourceavg = GeoJSONDataSource(geojson = data_covid_perdaymap.to_json())
# Define custom tick labels for color bar.
tick_labelsp = {'0': '0', '50': '50','80':'80','100':'100','150':'150','200':'200','300':'300'}
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapperp3,
label_standoff = 8,
width = 700, height = 20,
border_line_color = None,
location = (0,0),
orientation = 'horizontal',
major_label_overrides = tick_labelsp)
# Create figure object.
p3 = figure(title = 'Covid-19 Average Of Total cases by day per capita (Jan 2020 - July 2020)',
plot_height = 700 ,
plot_width = 700,
toolbar_location = 'below',
tools = "crosshair,save,pan,box_zoom,reset,wheel_zoom")
p3.xgrid.grid_line_color = None
p3.ygrid.grid_line_color = None
# Add patch renderer to figure.
states = p3.patches('xs','ys', source = geosourceavg,
fill_color = {'field' :'caseperdayavgov',
'transform' : color_mapperp3},
line_color = 'gray',
line_width = 0.25,
fill_alpha = 1)
p3.title.text_color = "Black"
p3.title.text_font = "Segoe UI"
p3.title.text_font_style = "bold italic"
p3.title.align ='center'
p3.title.text_font_size="10pt"
# Create hover tool
p3.add_tools(HoverTool(renderers = [states],
tooltips = [('State','@state'),
('Average cases by day','@caseperdayavgov')]))
p3.add_layout(color_bar, 'below')
p3.add_layout(Title(text="Figure 3", text_font_size="12pt",text_font_style='italic',text_color='blue'), 'below')
try:
bop.reset_output()
bop.output_notebook()
bop.show(p3)
# create an output file
output_file('Covid-19_average_cases_per_capita_Jan_2020_July_2020.html')
save(p3,notebook_handle=True)
except:
bop.output_notebook()
bop.show(p3)
# create an output file
output_file('Covid-19_average_cases_per_capita_Jan_2020_July_2020.html')
save(p3,notebook_handle=True)
#https://akrishn986.github.io/Covid19-Cause-EffectAnalysis/Covid-19_Percentage_of_average_cases_by_day_per_capita_Jan_2020_July_2020.html
bkplt3()
Similar to the previous plot , although this map reaffirms that New York has had a high average number of cases , Florida and Louisiana are observed to be a close second. The single image view can be expanded to multiple images per week/ day to show more details. And so, choropleth maps comparing the weekly mobility and case ratio side by side, with a small lag was chosen as the method for visualizing covid data from a mobility pattern perspective.
Another way to identify if there are glaring anamolies based on the weekly case numbers, is to look at the case ratio per week of the year 2020. The data has been scaled per 1,000,000 of the population.
# No of weeks of data available:
data_covid_perweek['Week'].max()-data_covid_perweek['Week'].min()
data_covid_perweek['Week'].min()
#Maximim week No in the dataset
data_covid_perweek['Week'].max()
#Week Numbers where almost all states are affected:
data_covid_perweek.groupby("Week")["state"].count()
# Plot 4 for tabbed donut charts
def bkplt4():
# Data
plots4=[]
for i in range(7,31):
data_covid_week_donut=data_covid_perweek[(data_covid_perweek['Week']==i)]
data_covid_week_donut=data_covid_week_donut.loc[:,['state','caseperweekratio']]
data_covid_week_donut=data_covid_week_donut.sort_values(by=['caseperweekratio'], ascending=False)
x=data_covid_week_donut.copy()
data = data_covid_week_donut.copy()
data['angle'] = data['caseperweekratio']/sum(data['caseperweekratio']) * 2*pi
c= pd.DataFrame(Category20b[20])
c=c.append([c]*5,ignore_index=True)
n=len(x)
colorp=c[:n]
data.index = colorp.index
data['color']=colorp[0]
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom,hover'
p4 = figure(plot_height=500,plot_width=600, title="Analysis Of Covid Cases Per Week{} by State ".format(i), tools = TOOLS,
tooltips=[("State", "@state"),("case per week ratio", "@caseperweekratio")])
p4.annular_wedge(x=0, y=1, inner_radius=0.2, outer_radius=0.7,
start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
line_color="white", fill_color='color', legend_field='state', source=data)
p4.axis.axis_label=None
p4.axis.formatter.use_scientific = False
p4.axis.visible=False
p4.grid.grid_line_color = None
p4.title.text_color = "darkBlue"
p4.title.text_font = "Segoe UI"
p4.title.text_font_style = "bold italic"
p4.title.align ='center'
p4.title.text_font_size="10pt"
p4.background_fill_color = "black"
p4.background_fill_alpha = 0.1
p4.border_fill_color = "whitesmoke"
p4.min_border_left = 20
p4.outline_line_width = 1
p4.outline_line_alpha = 0.5
p4.outline_line_color = "grey"
p4.legend[0].label_text_font_size ="8pt"
p4.legend.click_policy="hide"
p4.legend.inactive_fill_color='gray'
p4.legend.inactive_fill_alpha=0.4
p4.add_layout(p4.legend[0], 'left')
plots4.append(p4)
layout1=layout(Div(text='Figure 4.1', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[plots4[0],plots4[2]],[plots4[1],plots4[3]]])
layout2=layout(Div(text='Figure 4.2', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[plots4[4],plots4[6]],[plots4[5],plots4[7]]])
layout3=layout(Div(text='Figure 4.3', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[plots4[8],plots4[10]],[plots4[9],plots4[11]]])
layout4=layout(Div(text='Figure 4.4', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[plots4[12],plots4[14]],[plots4[13],plots4[15]]])
layout5=layout(Div(text='Figure 4.5', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[plots4[16],plots4[18]],[plots4[17],plots4[19]]])
layout6=layout(Div(text='Figure 4.6', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'}),[[plots4[20],plots4[22]],[plots4[21],plots4[23]]])
tab1 = Panel(child=layout1, title="Weeks 7-10 Of CoVID for 2020")
tab2 = Panel(child=layout2, title="Weeks 11-14 Of CoVID for 2020")
tab3 = Panel(child=layout3, title="Weeks 15-18 Of CoVID for 2020")
tab4 = Panel(child=layout4, title="Weeks 19-22 Of CoVID for 2020")
tab5 = Panel(child=layout5, title="Weeks 23-26 Of CoVID for 2020")
tab6 = Panel(child=layout6, title="Weeks 27-30 Of CoVID for 2020")
Finlayout4=Tabs(tabs=[tab1,tab2,tab3,tab4,tab5,tab6],sizing_mode='scale_width')
try:
bop.reset_output()
bop.output_notebook()
bop.show(Finlayout4)
# create an output file
output_file('Covid-19_average_cases_by_week_per_capita_Jan_2020_July_2020.html')
save(Finlayout4,notebook_handle=True)
except:
bop.output_notebook()
bop.show(Finlayout4)
# create an output file
output_file('Covid-19_average_cases_by_week_per_capita_Jan_2020_July_2020.html')
save(Finlayout4,notebook_handle=True)
# Refer to site for plot: https://akrishn986.github.io/Covid19-Cause-EffectAnalysis/Covid-19_average_cases_by_week_per_capita_Jan_2020_July_2020.html
bkplt4()
We can see that as time progresses the cases per week has reduced for New york and New Jersey, As seen in tab 4 between weeks 19 & 22. In order to analyze cause and effect we will have to consider the states which are ranked higher than most others in terms of no. of cases over the span of time. Hence, based on the above we can conclude that looking at the cumulative cases alone, does not provide enough evidence to allow us pick the following for our comparison:
Top 4 Highly affected States:
New York
New Jersey
District Of Columbia
Rhode Island
Bottom 4 Least Affected States:
Idaho
Oregon
Maine
Montana
The donut chart in provides a good idea of the spread and the significant states, however we need to look at multiple plots to consolidate the states that are prominent. When looking at the data from a monthly level, we can get a quick overview of the rapid changes that reaffirms the states that need to be considered for the granger analysis. Another drawback of the donut chart is that, although we can observe the bigger share of the donut immediately, the least affected states are not easy to catch. And so, the strip plot in Figure 10 can help consolidate the least affected states as well.
We would need to investigate further at another aggregated level : By Month
data_covid_perMonth.head(2)
# Plot 5 Time series stripplot
def bkplt5():
# scaling data to produce visual.
scale = 100
df = data_covid_perMonth.copy()
df['Month']=df['Month'].astype(str)
df['caseperMonthratio']=df['caseperMonthratio']/scale
df=df.sort_values(by=['Month','caseperMonthratio'])
source = ColumnDataSource(df)
p5 = figure(x_range = df['Month'].unique(), y_range = df['state'].unique(),plot_height=900,plot_width=900,title="Analysis Of Covid Cases Per Month by State")
color_mapper = LinearColorMapper(palette = Viridis256, low = df['caseperMonthratio'].min(), high = df['caseperMonthratio'].max())
color_bar = ColorBar(color_mapper = color_mapper,
location = (0, 0),
ticker = BasicTicker())
p5.add_layout(color_bar, 'right')
p5.scatter(x = 'Month', y = 'state', size = 'caseperMonthratio', fill_color = transform('caseperMonthratio', color_mapper), source = source)
p5.add_tools(HoverTool(tooltips = [('caseperMonthratio', '@caseperMonthratio'),('state','@state')]))
p5.xaxis.axis_label = 'Month'
p5.xaxis.axis_label_text_font = "Segoe UI"
p5.yaxis.axis_label = 'States'
p5.yaxis.axis_label_text_font = "Segoe UI"
p5.grid.grid_line_color = None
p5.title.text_color = "darkBlue"
p5.title.text_font = "Segoe UI"
p5.title.text_font_style = "bold italic"
p5.title.align ='center'
p5.title.text_font_size="10pt"
p5.background_fill_color = "White"
p5.background_fill_alpha = 0.1
p5.border_fill_color = "whitesmoke"
p5.min_border_left = 20
p5.outline_line_width = 1
p5.outline_line_alpha = 0.5
p5.outline_line_color = "grey"
p5.add_layout(Title(text="Figure 5", text_font_size="12pt",text_font_style='italic',text_color='blue'), 'below')
try:
bop.reset_output()
bop.output_notebook()
bop.show(p5)
# create an output file
output_file('Covid-19_average_cases_by_Month_per_capita_Jan_2020_July_2020.html')
save(p5,notebook_handle=True)
except:
bop.output_notebook()
bop.show(p5)
# create an output file
output_file('Covid-19_average_cases_by_Month_per_capita_Jan_2020_July_2020.html')
save(p5,notebook_handle=True)
# Refer to site for plot: https://akrishn986.github.io/Covid19-Cause-EffectAnalysis/19_average_cases_by_Month_per_capita_Jan_2020_July_2020.html
bkplt5()
Month 3: Observable that New York and New Jersey, show the same patterns as seen in the previous plots.
However, Month 4: the cases in New York, New Jersey and Massachusetts increase exponentially.
Month 5: D.C, Rhode Island and Connecticut follow suite.
Month 6: Arizona trails closely behind. Finally Month 7: Florida and Louisiana join the list.
The Mobility data has been downloaded from here: https://github.com/descarteslabs/DL-COVID-19
m50: The median of the max-distance mobility for all samples in the specified region.
This data will be used to compare against the case ratio per day for the least and most affected states pulled in section 5.
data_covid_perday.head(4)
# Loading Mobility data from Descarte labs
data_DL_Mobility = pd.read_csv('DL-us-mobility-daterow.csv')
data_DL_Mobility=data_DL_Mobility[(data_DL_Mobility['country_code']=='US') & (data_DL_Mobility['admin_level']==1)]
data_DL_Mobility.head(2)
# Change date format
data_DL_Mobility['date']=pd.to_datetime(data_DL_Mobility['date'], format = '%m/%d/%Y')
data_DL_Mobility.drop(columns=['admin2'],inplace=True)
# Merge with covid per day data
data_DL_Mobilityplt=data_DL_Mobility.merge(data_covid_perday, left_on=['admin1','date'], right_on=['state','date'])
data_DL_Mobilityplt.head(2)
#m50: The median of the max-distance mobility for all samples in the specified region.
# Dual axis Line and Bar chart
# function to load plot presets
def plotpreset(state):
TOOLS = 'crosshair,save,pan,box_zoom,reset,wheel_zoom'
x = figure(title="Statewise Mobility Index Vs Covid Case ratio {}".format(state), y_axis_type="linear",x_axis_type='datetime', tools = TOOLS, plot_width=700, plot_height=500)
x.xaxis.axis_label = 'Date'
x.yaxis.axis_label = 'Median m50'
x.y_range.start=0
x.y_range.end=int(data_DL_Mobilityplt['m50'].max()+1)
x.x_range.start=data_DL_Mobility['date'].min()
x.x_range.end=data_DL_Mobility['date'].max()
x.title.text_color = "Black"
x.title.text_font = "Segoe UI"
x.title.text_font_style = "bold italic"
x.title.align ='center'
x.title.text_font_size="10pt"
return x
# Defining color
def color():
color='steelblue'
line_color='darkred'
return color,line_color
#function for left hand side of the interctive plot
def updateL(instateL):
colorv,line_colorl=color()
p6=plotpreset(instateL)
df=data_DL_Mobilityplt[data_DL_Mobilityplt['state']==instateL]
source= ColumnDataSource(df)
p6.extra_y_ranges = {"caseperday": Range1d(start= df['caseperday'].min(), end= df['caseperday'].max())}
p6.add_layout(LinearAxis(y_range_name="caseperday", axis_label="Covid 19 Case Ratio"), 'right')
p6.line(x=df['date'].unique(), y=df['caseperday'],legend_label="Cases per day {}".format(instateL),y_range_name="caseperday",line_dash='dashed',line_alpha=1,line_width=3,line_color=line_colorl)
p6.vbar(x=df['date'].unique(), top=df['m50'], legend_label="Mobility Median-50 {}".format(instateL), width=24*60*60*1000 ,fill_alpha = 0.2,color=colorv)
p6.add_tools(HoverTool(tooltips = [("caseperday", "@$caseperday"),('m50',"@$m50")]))
p6.add_layout(p6.legend[0], 'below')
p6.add_layout(Title(text="Figure 6.1", text_font_size="12pt",text_font_style='italic',text_color='blue'), 'below')
toolbar_location = 'left',
return p6
#function for left hand side of the interactive plot
def updateR(instateR):
colorv,line_colorl=color()
q6=plotpreset(instateR)
df=data_DL_Mobilityplt[data_DL_Mobilityplt['state']==instateR]
source= ColumnDataSource(df)
q6.extra_y_ranges = {"caseperday": Range1d(start= df['caseperday'].min(), end= df['caseperday'].max())}
q6.add_layout(LinearAxis(y_range_name="caseperday", axis_label="Covid 19 Case Ratio"), 'right')
line=q6.line(x=df['date'].unique(), y=df['caseperday'],legend_label="Cases per day {}".format(instateR),y_range_name="caseperday",line_dash='dashed',line_alpha=1,line_width=3,line_color=line_colorl)
vbar=q6.vbar(x=df['date'].unique(), top=df['m50'], legend_label="Mobility Median-50 {}".format(instateR), width=24*60*60*1000 ,fill_alpha = 0.2,color=colorv)
q6.add_tools(HoverTool(tooltips = [("caseperday", "@$caseperday"),('m50',"@$m50")]))
q6.add_layout(q6.legend[0], 'below')
q6.add_layout(Title(text="Figure 6.2", text_font_size="12pt",text_font_style='italic',text_color='blue'), 'below')
toolbar_location = 'right',
return q6
# Function to load the interactive plot and save output
def update(Mostaffectedstates,Leastaffectedstates):
p6=updateR(Mostaffectedstates)
q6=updateL(Leastaffectedstates)
r6=row(p6,q6)
save(r6,notebook_handle=True)
try:
bop.reset_output()
bop.output_notebook()
bop.show(r6)
# create an output file
output_file('Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020.html')
save(r6,notebook_handle=True)
except:
bop.output_notebook()
bop.show(r6)
# create an output file
output_file('Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020.html')
save(r6,notebook_handle=True)
# Refer to site for plot: https://akrishn986.github.io/Covid19-Cause-EffectAnalysis/19_average_cases_by_Month_per_capita_Jan_2020_July_2020.html
from ipywidgets import widgets
interact(update, Mostaffectedstates=data_DL_Mobilityplt['state'].unique(),Leastaffectedstates=data_DL_Mobilityplt['state'].unique())
From the comparison above, we can see that mobility has a possible delayed effect on the case ratio. We can further investigate this by plotting the case ratio per week and the average median values of samples per week.
Refer to site for plot: https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_NYFL.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_NJAZ.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_MALO.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_MAME.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_NYVT.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_NJKY.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_AZOR.html
Refer to site for plot:https://akrishn986.github.io/Covid-19_Mobilitym50_state_by_state_Jan_2020_July_2020_FLWV.html
#Plot mobility average of median vs cases per week delayed by 3 weeks
data_DL_Mobility.head(2)
data_DL_Mobility['Day']=data_DL_Mobility['date'].dt.dayofyear
data_DL_MobilitypltdayVid=data_DL_Mobility.sort_values(by=['admin1','Day'], ascending=True)
data_DL_MobilitypltdayVid=data_DL_MobilitypltdayVid.loc[:,['admin1','Day','m50']]
data_DL_MobilitypltdayVid.rename(columns={'admin1': 'state'},inplace=True)
data_DL_MobilitypltdayVid= data_DL_MobilitypltdayVid[data_DL_MobilitypltdayVid.state.isin(data_covid_perday['state'].unique())]
data_DL_MobilitypltdayVid = contiguous_usa.merge(data_DL_MobilitypltdayVid, left_on = 'NAME', right_on = 'state')
data_DL_MobilitypltdayVid.head(5)
data_DL_MobilitypltdayVid['Day'].min()
data_DL_MobilitypltdayVid['Day'].max()
from bokeh.io import export_png
from bokeh.io import export_svgs
from bokeh.themes import built_in_themes
def bkpltmobday(dayst,dayend):
for i in range(dayst,dayend): ## rerun from 110, 150 - misisng data
if i in (111,151):
continue
data_DL_Mobilitypltdaymap=data_DL_MobilitypltdayVid[(data_DL_MobilitypltdayVid['Day']==i)]
geosourcemblitdymap = GeoJSONDataSource(geojson = data_DL_Mobilitypltdaymap.to_json())
# Define color palettes
palettepc = magma(60)
palettepc = palettepc[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapperpc = LinearColorMapper(palette = palettepc, low =data_DL_Mobilitypltdaymap['m50'].min(),
high = data_DL_Mobilitypltdaymap['m50'].max())
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapperpc,
label_standoff = 8,
width = 500, height = 20,
border_line_color = None,
location = (0,0),
orientation = 'horizontal')
# Create figure object.
pc = figure(title='Day {}'.format(i),plot_height = 500 ,
plot_width = 550, )
pc.toolbar.logo = None
pc.toolbar_location = None
pc.axis.visible = False
pc.xgrid.visible = False
pc.ygrid.visible = False
pc.min_border_left = 0
pc.min_border_right = 0
pc.min_border_top = 0
pc.min_border_bottom = 0
pc.background_fill_alpha = 0.8
pc.background_fill_color = 'Black'
states = pc.patches('xs','ys', source = geosourcemblitdymap,
fill_color = {'field' :'m50',
'transform' : color_mapperpc},
line_color = 'gray',
line_width = 0.25,
fill_alpha = 1)
if len(str(i))==2:
fname=('PNGIMG\outputimage_day00{}.png'.format(i))
else:
fname=('PNGIMG\outputimage_day0{}.png'.format(i))
export_png(pc, filename=fname)
bkpltmobday(61,111)
bkpltmobday(112,150)
bkpltmobday(151,201)
import os
import moviepy.video.io.ImageSequenceClip
image_folder='PNGIMG'
fps=5
import re
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
return [
int(text)
if text.isdigit() else text.lower()
for text in _nsre.split(s)]
image_files = [image_folder+'/'+img for img in os.listdir(image_folder) if img.endswith(".png")]
sorted_images = sorted(image_files, key=natural_sort_key)
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(sorted_images, fps=fps)
clip.write_videofile('MobilitybyDayTimeLapse.mp4')
from IPython.display import HTML
HTML("""
<video alt="test" controls>
<source src="MobilitybyDayTimeLapse.mp4" type="video/mp4">
</video>
""")
data_DL_Mobilitypltwkmap=data_DL_Mobilityplt.sort_values(by=['state','date'], ascending=True)
#Average of the median values per week
data_DL_Mobilitypltwkmap=data_DL_Mobilitypltwkmap.groupby(['state','Week'])[['m50']].mean()
data_DL_Mobilitypltwkmap.reset_index(inplace=True)
data_DL_Mobilitypltwkmap.rename(columns={'m50': 'Averagemedian50'},inplace=True)
data_DL_Mobilitypltwkmap = contiguous_usa.merge(data_DL_Mobilitypltwkmap, left_on = 'NAME', right_on = 'state')
data_DL_Mobilitypltwkmap.head(2)
data_DL_Mobilitypltwkmap['Averagemedian50'].min()
data_DL_Mobilitypltwkmap['Averagemedian50'].max()
data_DL_Mobilitypltwkmap['Week'].min()
data_DL_Mobilitypltwkmap['Week'].max()
data_covid_perweekmapdata=data_covid_perweek.sort_values(by=['state',"Week"], ascending=True)
data_covid_perweekmapdata = contiguous_usa.merge(data_covid_perweekmapdata, left_on = 'NAME', right_on = 'state')
data_covid_perweekmapdata['caseperweekratio'].max()
data_covid_perweekmapdata['caseperweekratio'].min()
# Multiple map plots per week
def bkplt71():
plots71=[]
for i in range(9,data_DL_Mobilitypltwkmap['Week'].max()-4):
data_DL_Mobilitypltwkmapnew=data_DL_Mobilitypltwkmap[(data_DL_Mobilitypltwkmap['Week']==i)]
geosourcemblitywkmap = GeoJSONDataSource(geojson = data_DL_Mobilitypltwkmapnew.to_json())
# Define color palettes
palettep71 = magma(60)
palettep71 = palettep71[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapperp71 = LinearColorMapper(palette = palettep71, low =data_DL_Mobilitypltwkmapnew['Averagemedian50'].min(),
high = data_DL_Mobilitypltwkmapnew['Averagemedian50'].max())
tick_labelsp71 = {'0': '0', '50': '50','80':'80','100':'100','150':'150','200':'200','300':'300'}
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapperp71,
label_standoff = 8,
width = 500, height = 20,
border_line_color = None,
location = (0,0),
orientation = 'horizontal',
major_label_overrides = tick_labelsp71)
# Create figure object.
p71 = figure(title = 'Average Median Mobility per Week {} by State '.format(i),
plot_height = 500 ,
plot_width = 550,
toolbar_location = 'left',
tools = "pan, wheel_zoom, box_zoom, reset")
p71.xgrid.grid_line_color = None
p71.ygrid.grid_line_color = None
# Add patch renderer to figure.
states = p71.patches('xs','ys', source = geosourcemblitywkmap,
fill_color = {'field' :'Averagemedian50',
'transform' : color_mapperp71},
line_color = 'gray',
line_width = 0.25,
fill_alpha = 1)
p71.title.text_color = "Black"
p71.title.text_font = "Segoe UI"
p71.title.text_font_style = "bold italic"
p71.title.align ='center'
p71.title.text_font_size="10pt"
# Create hover tool
p71.add_tools(HoverTool(renderers = [states],
tooltips = [('state','@NAME'),
('Average median Mobility by Week','@Averagemedian50')]))
# Specify layout
p71.add_layout(color_bar, 'below')
plots71.append(p71)
return plots71
# Multiple map plots per week
def bkplt72():
plots72=[]
for i in range(13,data_covid_perweekmapdata['Week'].max()):
data_covid_perweekmapdatanew=data_covid_perweekmapdata[(data_covid_perweekmapdata['Week']==i)]
geosourcecovid_perweekmap = GeoJSONDataSource(geojson = data_covid_perweekmapdatanew.to_json())
# Define color palettes
palettep72 = magma(60)
palettep72 = palettep72[::-1] # reverse order of colors so higher values have darker colors
# Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
color_mapperp72 = LinearColorMapper(palette = palettep72, low =data_covid_perweekmapdatanew['caseperweekratio'].min(),
high = data_covid_perweekmapdatanew['caseperweekratio'].max())
# Define custom tick labels for color bar.
tick_labelsp72 = {'0': '0', '50': '50','80':'80','100':'100','150':'150','200':'200','300':'300'}
# Create color bar.
color_bar = ColorBar(color_mapper = color_mapperp72,
label_standoff = 8,
width = 500, height = 20,
border_line_color = None,
location = (0,0),
orientation = 'horizontal',
major_label_overrides = tick_labelsp72)
# Create figure object.
p72 = figure(title = 'Covid case ratio per Week {} by State '.format(i),
plot_height = 500 ,
plot_width = 550,
toolbar_location = 'left',
tools = "pan, wheel_zoom, box_zoom, reset")
p72.xgrid.grid_line_color = None
p72.ygrid.grid_line_color = None
# Add patch renderer to figure.
states = p72.patches('xs','ys', source = geosourcecovid_perweekmap,
fill_color = {'field' :'caseperweekratio',
'transform' : color_mapperp72},
line_color = 'gray',
line_width = 0.25,
fill_alpha = 1)
p72.title.text_color = "Black"
p72.title.text_font = "Segoe UI"
p72.title.text_font_style = "bold italic"
p72.title.align ='center'
p72.title.text_font_size="10pt"
# Create hover tool
p72.add_tools(HoverTool(renderers = [states],
tooltips = [('state','@NAME'),
('Caseratio by Week','@caseperweekratio')]))
# Specify layout
p72.add_layout(color_bar, 'below')
plots72.append(p72)
return plots72
def pltlayout():
plots71=bkplt71()
plots72=bkplt72()
layout1=layout(Div(text='Figure 7.1', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[0],plots71[1],plots71[2],plots71[3]]
,[plots72[0],plots72[1],plots72[2],plots72[3]])
layout2=layout(Div(text='Figure 7.2', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[4],plots71[5],plots71[6],plots71[7]]
,[plots72[4],plots72[5],plots72[6],plots72[7]])
layout3=layout(Div(text='Figure 7.3', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[8],plots71[9],plots71[10],plots71[11]]
,[plots72[8],plots72[9],plots72[10],plots72[11]])
layout4=layout(Div(text='Figure 7.4', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[12],plots71[13],plots71[14],plots71[15]]
,[plots72[12],plots72[13],plots72[14],plots72[15]])
tab1 = Panel(child=layout1, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab1")
tab2 = Panel(child=layout2, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab2")
tab3 = Panel(child=layout3, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab3")
tab4 = Panel(child=layout4, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab4")
Finlayout7=Tabs(tabs=[tab1,tab2,tab3,tab4],sizing_mode='scale_width')
try:
bop.reset_output()
bop.output_notebook()
bop.show(Finlayout7)
# create an output file
output_file('Covid-19_Mobilitym50andCaseRatio_week_byweek_Jan_2020_July_2020.html')
save(Finlayout7,notebook_handle=True)
except:
bop.output_notebook()
bop.show(Finlayout7)
# create an output file
output_file('Covid-19_Mobilitym50andCaseRatio_week_byweek_Jan_2020_July_2020.html')
save(Finlayout7,notebook_handle=True)
def pltlayoutalt():
plots71=bkplt71()
plots72=bkplt72()
layout1=layout(Div(text='Figure 7.1', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[0],plots72[0]]
,[plots71[1],plots72[1]]
,[plots71[2],plots72[2]]
,[plots71[3],plots72[3]])
layout2=layout(Div(text='Figure 7.2', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[4],plots72[4]]
,[plots71[5],plots72[5]]
,[plots71[6],plots72[6]]
,[plots71[7],plots72[7]])
layout3=layout(Div(text='Figure 7.3', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,[plots71[8],plots72[8]]
,[plots71[9],plots72[9]]
,[plots71[10],plots72[10]]
,[plots71[11],plots72[11]])
layout4=layout([Div(text='Figure 7.4', style={'font-size': '100%', 'color': 'blue','align-self': 'flex-end'})
,plots71[12],plots72[12]]
,[plots71[13],plots72[13]]
,[plots71[14],plots72[14]]
,[plots71[15],plots72[15]])
tab1 = Panel(child=layout1, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab1")
tab2 = Panel(child=layout2, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab2")
tab3 = Panel(child=layout3, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab3")
tab4 = Panel(child=layout4, title="Week by Week Comparison of Mobility And Covid Case Ratio Tab4")
Finlayout7=Tabs(tabs=[tab1,tab2,tab3,tab4],sizing_mode='scale_width')
try:
bop.reset_output()
bop.output_notebook()
bop.show(Finlayout7)
# create an output file
output_file('Covid-19_Mobilitym50andCaseRatio_week_byweek_Jan_2020_July_2020.html')
save(Finlayout7,notebook_handle=True)
except:
bop.output_notebook()
bop.show(Finlayout7)
# create an output file
output_file('Covid-19_Mobilitym50andCaseRatio_week_byweek_Jan_2020_July_2020.html')
save(Finlayout7,notebook_handle=True)
pltlayout()